import ipywidgets as widgets
from ipywidgets import interact, interact_manual
import pandas as pd
wine_data = pd.read_csv('winequality-white.csv')
wine_data.head()
fixed acidity volatile acidity citric acid residual sugar chlorides free sulfur dioxide total sulfur dioxide density pH sulphates alcohol quality
0 7.0 0.27 0.36 20.7 0.045 45.0 170.0 1.0010 3.00 0.45 8.8 6
1 6.3 0.30 0.34 1.6 0.049 14.0 132.0 0.9940 3.30 0.49 9.5 6
2 8.1 0.28 0.40 6.9 0.050 30.0 97.0 0.9951 3.26 0.44 10.1 6
3 7.2 0.23 0.32 8.5 0.058 47.0 186.0 0.9956 3.19 0.40 9.9 6
4 7.2 0.23 0.32 8.5 0.058 47.0 186.0 0.9956 3.19 0.40 9.9 6
wine_data.columns
Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'quality'],
      dtype='object')
wine_data.tail()
fixed acidity volatile acidity citric acid residual sugar chlorides free sulfur dioxide total sulfur dioxide density pH sulphates alcohol quality
4893 6.2 0.21 0.29 1.6 0.039 24.0 92.0 0.99114 3.27 0.50 11.2 6
4894 6.6 0.32 0.36 8.0 0.047 57.0 168.0 0.99490 3.15 0.46 9.6 5
4895 6.5 0.24 0.19 1.2 0.041 30.0 111.0 0.99254 2.99 0.46 9.4 6
4896 5.5 0.29 0.30 1.1 0.022 20.0 110.0 0.98869 3.34 0.38 12.8 7
4897 6.0 0.21 0.38 0.8 0.020 22.0 98.0 0.98941 3.26 0.32 11.8 6
len(wine_data)
4898
wine_data.describe()
fixed acidity volatile acidity citric acid residual sugar chlorides free sulfur dioxide total sulfur dioxide density pH sulphates alcohol quality
count 4898.000000 4898.000000 4898.000000 4898.000000 4898.000000 4898.000000 4898.000000 4898.000000 4898.000000 4898.000000 4898.000000 4898.000000
mean 6.854788 0.278241 0.334192 6.391415 0.045772 35.308085 138.360657 0.994027 3.188267 0.489847 10.514267 5.877909
std 0.843868 0.100795 0.121020 5.072058 0.021848 17.007137 42.498065 0.002991 0.151001 0.114126 1.230621 0.885639
min 3.800000 0.080000 0.000000 0.600000 0.009000 2.000000 9.000000 0.987110 2.720000 0.220000 8.000000 3.000000
25% 6.300000 0.210000 0.270000 1.700000 0.036000 23.000000 108.000000 0.991723 3.090000 0.410000 9.500000 5.000000
50% 6.800000 0.260000 0.320000 5.200000 0.043000 34.000000 134.000000 0.993740 3.180000 0.470000 10.400000 6.000000
75% 7.300000 0.320000 0.390000 9.900000 0.050000 46.000000 167.000000 0.996100 3.280000 0.550000 11.400000 6.000000
max 14.200000 1.100000 1.660000 65.800000 0.346000 289.000000 440.000000 1.038980 3.820000 1.080000 14.200000 9.000000

distplot

import seaborn as sns
import matplotlib.pyplot as plt
wine_data = pd.read_csv('winequality-white.csv')
wine_data.columns = ['fixed_acidity', 'volatile_acidity', 'citric_acid', 'residual_sugar',
                     'chlorides', 'free_sulfur_dioxide', 'total_sulfur_dioxide', 'density',
                     'pH', 'sulphates', 'alcohol', 'quality']
wine_data.tail(10)
fixed_acidity volatile_acidity citric_acid residual_sugar chlorides free_sulfur_dioxide total_sulfur_dioxide density pH sulphates alcohol quality
4888 6.8 0.220 0.36 1.20 0.052 38.0 127.0 0.99330 3.04 0.54 9.2 5
4889 4.9 0.235 0.27 11.75 0.030 34.0 118.0 0.99540 3.07 0.50 9.4 6
4890 6.1 0.340 0.29 2.20 0.036 25.0 100.0 0.98938 3.06 0.44 11.8 6
4891 5.7 0.210 0.32 0.90 0.038 38.0 121.0 0.99074 3.24 0.46 10.6 6
4892 6.5 0.230 0.38 1.30 0.032 29.0 112.0 0.99298 3.29 0.54 9.7 5
4893 6.2 0.210 0.29 1.60 0.039 24.0 92.0 0.99114 3.27 0.50 11.2 6
4894 6.6 0.320 0.36 8.00 0.047 57.0 168.0 0.99490 3.15 0.46 9.6 5
4895 6.5 0.240 0.19 1.20 0.041 30.0 111.0 0.99254 2.99 0.46 9.4 6
4896 5.5 0.290 0.30 1.10 0.022 20.0 110.0 0.98869 3.34 0.38 12.8 7
4897 6.0 0.210 0.38 0.80 0.020 22.0 98.0 0.98941 3.26 0.32 11.8 6
plt.hist(wine_data.alcohol)
(array([ 37., 808., 969., 761., 765., 625., 427., 368., 110.,  28.]),
 array([ 8.  ,  8.62,  9.24,  9.86, 10.48, 11.1 , 11.72, 12.34, 12.96,
        13.58, 14.2 ]),
 <a list of 10 Patch objects>)
# histogram bars KDE:gaussian kernel density estimate.
sns.distplot(wine_data.alcohol, kde=True)
<matplotlib.axes._subplots.AxesSubplot at 0x1a77b2c8208>

around 40% have alcohol content = 9.3

f, ax = plt.subplots(figsize=(15, 5))
sns.distplot(wine_data.alcohol, kde=True)
<matplotlib.axes._subplots.AxesSubplot at 0x1a77b3b8b38>
f, ax = plt.subplots(figsize=(15, 5))
sns.distplot(wine_data.alcohol, kde=False, rug=True, bins=200)
<matplotlib.axes._subplots.AxesSubplot at 0x1a77b5d20f0>

rug: Whether to draw a rugplot on the support axis.

bins control granularity of the bars , bins = more size -> you can analyse the data more deep

f, ax = plt.subplots(figsize=(15, 5))
# most of the rug distribution is clustered around centre
sns.rugplot(wine_data.alcohol, height=0.75)
<matplotlib.axes._subplots.AxesSubplot at 0x1a77b5cc5f8>
f, ax = plt.subplots(figsize=(15, 5))
sns.distplot(wine_data.alcohol, kde=True, rug=True, hist=False)
<matplotlib.axes._subplots.AxesSubplot at 0x1a77be83160>

shade the Kernal density estimation area

f, ax = plt.subplots(figsize=(15, 5))
sns.set(color_codes=True)
sns.kdeplot(wine_data.alcohol, shade=True, color='r')
<matplotlib.axes._subplots.AxesSubplot at 0x1a77c2570f0>

plotting KDE with various bandwidth that fits the original data

f, ax = plt.subplots(figsize=(15, 5))
sns.kdeplot(wine_data.alcohol)
sns.kdeplot(wine_data.alcohol, bw=0.04, label='bw =0.04 ')
sns.kdeplot(wine_data.alcohol, bw=0.2, label='bw =0.2 ')
sns.kdeplot(wine_data.alcohol, bw=2, label='bw =2 ')
sns.kdeplot(wine_data.alcohol, bw=5, label='bw =5 ')
<matplotlib.axes._subplots.AxesSubplot at 0x1a77c2c5828>

the more erratic the plot is the less generic information it provides

Joint Plots along with univariate distribution

sns.jointplot(x=wine_data.free_sulfur_dioxide, y=wine_data.total_sulfur_dioxide, xlim={
              0, 150}, ylim={0, 400}, alpha=0.2)  # ALPHA STILL WORKS HERE
<seaborn.axisgrid.JointGrid at 0x1a77c772c50>

Hexbin Plots - Histogram representation of bivariate plot

the problem here in jointplot is at the middle we can not decide the relationship between wine_data.free_sulfur_dioxide wine_data.total_sulfur_dioxide ; solution to this is plotting Hexbin plot with Hue variations

how dark and how light a perticular point is represents the height of histogram at intersection

sns.jointplot(x=wine_data.free_sulfur_dioxide,              y=wine_data.total_sulfur_dioxide, kind='hex', xlim={0, 100}, ylim={0, 200})
<seaborn.axisgrid.JointGrid at 0x1a77c7357f0>

KDE curves for bivariate distribution

sns.jointplot(x=wine_data.free_sulfur_dioxide,
              y=wine_data.total_sulfur_dioxide, kind='kde', xlim={0, 80}, ylim={0, 300})
<seaborn.axisgrid.JointGrid at 0x1a77ca967f0>
f, ax = plt.subplots(figsize=(8, 5))
sns.kdeplot(wine_data.free_sulfur_dioxide, wine_data.total_sulfur_dioxide)
sns.rugplot(wine_data.free_sulfur_dioxide, color='g', height=0.05)
sns.rugplot(wine_data.total_sulfur_dioxide,
            color='b', height=0.05, vertical=True)
plt.xlim(-20, 100)
plt.ylim(-50, 400)
(-50, 400)

plot bivariate relationships between every pair of columns in a dataset

# sns.pairplot(wine_data, height=3) # rn this it takes a lot of time
sns.pairplot(wine_data, height=3, vars=[
             'fixed_acidity', 'chlorides', 'sulphates', 'alcohol', 'quality'], diag_kind='kde')
<seaborn.axisgrid.PairGrid at 0x1a77d16eb38>

pairwise regressions

sns.pairplot(wine_data, height=3, vars=[
             'chlorides', 'sulphates', 'quality'], kind='reg')
<seaborn.axisgrid.PairGrid at 0x1a77ee865c0>
g = sns.PairGrid(wine_data, height=3, vars=['chlorides', 'sulphates', 'quality'])
g.map(plt.scatter)
<seaborn.axisgrid.PairGrid at 0x1a77f911550>

dont you think it is stupid to plot a univariate varaible against itself... ok lets do one thing let's plot KDE for diagonal and scatter for non diagonal plots

g = sns.PairGrid(wine_data, vars=['chlorides','sulphates', 'alcohol'], hue='quality')
g.map_offdiag(plt.scatter)
g.map_diag(sns.kdeplot)
plt.legend(loc=2)
<matplotlib.legend.Legend at 0x1a77fe86438>

Now how about tuning PairGrid even further map_upper map_lower map_diag

g = sns.PairGrid(wine_data, vars=['chlorides', 'sulphates', 'alcohol'])

g.map_diag(sns.kdeplot)
g.map_upper(sns.scatterplot)
g.map_lower(sns.regplot)
<seaborn.axisgrid.PairGrid at 0x1a701628940>

Howabout using PairGrid with x_vars and y_vars differet

g = sns.PairGrid(wine_data, x_vars=['chlorides', 'sulphates', 'fixed_acidity'], y_vars=['alcohol'])
g.map(plt.scatter)

# g.map_diag(sns.kdeplot)
# g.map_upper(sns.scatterplot)
# g.map_lower(sns.regplot)
<seaborn.axisgrid.PairGrid at 0x1a701b7ba58>
g = sns.PairGrid(wine_data, x_vars=['chlorides', 'sulphates', 'fixed_acidity'], y_vars=['alcohol'])
g.map(sns.scatterplot)  # sns looks more beautiful
<seaborn.axisgrid.PairGrid at 0x1a701e0d978>
# sns.pairplot(wine_data, height=3, vars=['fixed_acidity','chlorides','sulphates', 'quality'], hue = 'pH')

Heatmaps

corrmat = wine_data.corr()
f, ax = plt.subplots(figsize=(10, 10))
# sns.heatmap(corrmat, vmin = -0.9, vmax=0.95, square = True, annot= True, fmt='.2f', cmap='summer' )
sns.heatmap(corrmat, annot=True, fmt='0.2f', square=True)
<matplotlib.axes._subplots.AxesSubplot at 0x1a701ecf940>

lmplots and regplots for regression

sns.lmplot(x='residual_sugar', y='density', data=wine_data, height=7, aspect=2)
<seaborn.axisgrid.FacetGrid at 0x1a701c1b898>

when MSE is low CI is narrow and whereever the MSE is high CI-confidence Interval is High

categorical variable with discrete values

sns.lmplot(x='quality', y='alcohol', data=wine_data)
<seaborn.axisgrid.FacetGrid at 0x1a7024a9390>

hard to see individual data points so add some jittering - regression line is not affected by jittering

sns.lmplot(x='quality', y='alcohol', data=wine_data, x_jitter=.2)
<seaborn.axisgrid.FacetGrid at 0x1a702705940>
import numpy as np
sns.lmplot(x='quality', y='alcohol', data=wine_data, x_estimator=np.mean)
<seaborn.axisgrid.FacetGrid at 0x1a7022fec88>

Higher quality means higher alcohol content

# sns.lmplot(x='pH',y='fixed_acidity', data= wine_data, row='quality', hue='alcohol') # Fixed_acidity vs pH in each quality category with hue= alcohol : needs tuning
# sns.lmplot(x='pH',y='fixed_acidity', data= wine_data, col='quality', hue='alcohol') # Fixed_acidity vs pH in each quality category with hue= alcohol

regplot

regplot is similar to lmplot but accepts inputs in various forms such as numpy arrays pandas series dataframe variable references

regplot is axis level v/s lmplot is figure level

lmplot is more powerful and operates at higher level at matplotlib

sns.regplot(wine_data.alcohol, wine_data.density, color='0')
<matplotlib.axes._subplots.AxesSubplot at 0x1a7027f7eb8>
f, ax = plt.subplots(figsize=(12, 5))
sns.regplot(x=wine_data.residual_sugar, y=wine_data.density,
            ax=ax)  # controlling size and shape
<matplotlib.axes._subplots.AxesSubplot at 0x1a70285fc88>

joint plots also plots regression if kind is specified as 'reg'

sns.jointplot(x=wine_data.free_sulfur_dioxide, y=wine_data.total_sulfur_dioxide, xlim={0, 150}, ylim={0, 400}, kind='reg')
<seaborn.axisgrid.JointGrid at 0x1a70282f2e8>
sns.pairplot(wine_data, x_vars=['fixed_acidity','citric_acid', 'chlorides'], y_vars=['alcohol'], kind='reg', height=8, aspect=1)
<seaborn.axisgrid.PairGrid at 0x1a703abef98>

CATEGORICAL AND MULTIPANEL DATA

f, ax = plt.subplots(figsize=(15, 5))
sns.stripplot(x='quality', y='alcohol', data=wine_data)
<matplotlib.axes._subplots.AxesSubplot at 0x1a703fabe48>
sns.swarmplot(x='quality', y='alcohol', data=wine_data)
<matplotlib.axes._subplots.AxesSubplot at 0x1a7043a5438>
sns.dogplot() # haha 

BOX PLOT

sns.boxplot(x='quality', y='alcohol', data=wine_data)
<matplotlib.axes._subplots.AxesSubplot at 0x1a7044a48d0>

wiskers are mostly 1.5 times inter quantile ; any data outside this range is represented as outlier seperately

VIOLIN PLOT mit distribution probability

sns.violinplot(x='quality', y='alcohol', data=wine_data)
<matplotlib.axes._subplots.AxesSubplot at 0x1a7047d04e0>

The curved boundaries represents KDE , violin plot also shows mode ; violin plots can also be plotted relative to counts of individual data

sns.violinplot(x='quality', y='alcohol', data=wine_data, scale='count')
<matplotlib.axes._subplots.AxesSubplot at 0x1a704b188d0>

here you can scale the width with respect to number of counts of each categorical observation

sns.violinplot(x='quality', y='alcohol', data=wine_data,scale='count', inner='stick')
<matplotlib.axes._subplots.AxesSubplot at 0x1a704a33ef0>

scaled thickly closed lines suggest there are many samples in our data with quality 6 and at alcohol level 11.5

COMBINING SWARM AND VIOLIN PLOTS

sns.violinplot(x='quality', y='alcohol', data=wine_data)
sns.swarmplot(x='quality', y='alcohol', data=wine_data)
<matplotlib.axes._subplots.AxesSubplot at 0x1a709101dd8>

STATISTICAL ESTIMATION

sns.barplot(x='quality', y='pH', data=wine_data)
<matplotlib.axes._subplots.AxesSubplot at 0x1a708979550>
sns.countplot(x='alcohol', data=wine_data)
<matplotlib.axes._subplots.AxesSubplot at 0x1a7089f98d0>
sns.countplot(x='alcohol', data=wine_data, color='m', palette='Greens_d')
plt.xticks(rotation=90)
(array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
         13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
         26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
         39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
         52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
         65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
         78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
         91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102]),
 <a list of 103 Text xticklabel objects>)

Number of wines at every alcohollevel

POINT PLOT

A point plot represents an estimate of central tendency for a numeric variable by the position of scatter plot points and provides some indication of the uncertainty around that estimate using error bars.

sns.pointplot(x='quality', y='pH', data=wine_data)
<matplotlib.axes._subplots.AxesSubplot at 0x1a7022f1390>

Observe how the alcohol content varies with the ph and quality of wine

BOX PLOT

sns.boxplot(data=wine_data, orient='h')
<matplotlib.axes._subplots.AxesSubplot at 0x1a704a16630>

Seaborn is a powerful visualization library built on top of matplotlib ; tightly integrated with PyData stack and makes production ready plots

Facet grids in Seaborn

titanic = pd.read_csv('titanic.csv')
titanic.head()
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S
g = sns.FacetGrid(titanic, col='Pclass') # try plotting with out mapping and see !
g = sns.FacetGrid(titanic, col='Survived')
g.map(plt.hist, 'Pclass', color='k')
<seaborn.axisgrid.FacetGrid at 0x1a7029fde48>

Plotting bivariate data

g = sns.FacetGrid(titanic, col='Survived', height=8)
g.map(plt.scatter, 'Age', 'Fare')
<seaborn.axisgrid.FacetGrid at 0x1a70aea7198>
g = sns.FacetGrid(titanic, col='Survived', height=8, hue='Sex')
g.map(plt.scatter, 'Age', 'Fare', alpha=0.99)  # alpha is the opacity
g.add_legend()
<seaborn.axisgrid.FacetGrid at 0x1a70b438588>
g = sns.FacetGrid(titanic, col='Survived', height=8, hue='Sex')
g.map(plt.bar, 'Age', 'Fare', alpha=0.99)  # alpha is the opacity
g.add_legend()
<seaborn.axisgrid.FacetGrid at 0x1a70b4389e8>
g = sns.FacetGrid(titanic, col='Survived', row='Pclass', height=8, hue='Sex')
g.map(plt.bar, 'Age', 'Fare', alpha=0.99)  # alpha is the opacity
g.add_legend()
<seaborn.axisgrid.FacetGrid at 0x1a70ada8208>
g = sns.FacetGrid(titanic, col='Survived', row='Pclass', height=8, hue='Sex')
g.map(sns.barplot, 'Age', 'Fare')
c:\python37\lib\site-packages\seaborn\axisgrid.py:715: UserWarning: Using the barplot function without specifying `order` is likely to produce an incorrect plot.
  warnings.warn(warning)
<seaborn.axisgrid.FacetGrid at 0x1a70da390b8>
g = sns.FacetGrid(titanic, col='Survived', row='Pclass', height=8, hue='Sex')
g.map(plt.scatter, 'Age', 'Fare', alpha=0.99)  # alpha is the opacity
g.add_legend()
<seaborn.axisgrid.FacetGrid at 0x1a70df7d630>
# How about changing some colors
h = {"male": 'b', "female": 'r'}
g = sns.FacetGrid(titanic, col='Survived', row='Pclass',height=8, hue='Sex', palette=h)
g.map(plt.scatter, 'Age', 'Fare', alpha=0.99)  # alpha is the opacity
g.add_legend()
<seaborn.axisgrid.FacetGrid at 0x1a70ece2f28>

NOW HOW ABOUT SOME FINE TUNInG!!!

g = sns.FacetGrid(titanic, col='Survived', height=8, col_wrap=4)
g.map(sns.barplot, 'Age', 'Fare')
<seaborn.axisgrid.FacetGrid at 0x1a70ef77080>
g = sns.FacetGrid(titanic, col='Survived', row='Pclass', height=8, hue='Sex')
g.map(sns.barplot, 'Age', 'Fare', color='#334488',
      edgecolor='red', lw=.5)  # color is a hex parameter
plt.xticks(rotation=90)
# giving some white space and horizontal spacing
g.fig.subplots_adjust(wspace=0.3, hspace=0.5)

Changing labels

g = sns.FacetGrid(titanic, col='Survived', row='Pclass', height=8)
g.map(sns.barplot, 'Age', 'Fare', color='#334488',
      edgecolor='red', lw=.5)  # color is a hex parameter
plt.xticks(rotation=90)
# giving some white space and horizontal spacing
g.fig.subplots_adjust(wspace=0.3, hspace=0.5)
g.set_axis_labels('Age of Passengers', 'Fare of each Passenger')
<seaborn.axisgrid.FacetGrid at 0x1a7140d92e8>

is this bar graph showing frequency of fares? no !! it is just giving one farevalue for each bucket of age; work on it later now!

Customizing the yticks

g = sns.FacetGrid(titanic, col='Survived', height=8, hue='Sex')
g.map(sns.scatterplot, 'Age', 'Fare')  # color is a hex parameter
# giving some white space and horizontal spacing
g.fig.subplots_adjust(wspace=0.3, hspace=0.5)
g.set_axis_labels('Age of Passengers', 'Fare of each Passenger')
g.set(yticks=[0, 50, 100, 150, 200, 250, 300, 350, 400, 450, 500])
<seaborn.axisgrid.FacetGrid at 0x1a7125ef9e8>

xlim and ylim - limit your x and y ticks

g = sns.FacetGrid(titanic, col='Survived', height=5,
                  hue='Sex', xlim=(0, 16), ylim=(100, 500))
g.map(sns.scatterplot, 'Age', 'Fare')  # color is a hex parameter
# giving some white space and horizontal spacing
g.fig.subplots_adjust(wspace=0.3, hspace=0.5)
g.set_axis_labels('Age of Passengers', 'Fare of each Passenger')
<seaborn.axisgrid.FacetGrid at 0x1a712825ba8>

this is function inbulit argument method to restrict our x and y ticks

g = sns.FacetGrid(titanic, col='Survived', height=5, hue='Sex')
g.map(sns.scatterplot, 'Age', 'Fare')  # color is a hex parameter
# giving some white space and horizontal spacing
g.fig.subplots_adjust(wspace=0.3, hspace=0.5)
g.set_axis_labels('Age of Passengers', 'Fare of each Passenger')
g.set(xlim=(0, 16), ylim=(100, 500))
<seaborn.axisgrid.FacetGrid at 0x1a712875908>

Machine learning bicycle data set

bike = pd.read_csv('bike_sharing_daily.csv')
bike.head()
instant dteday season yr mnth holiday weekday workingday weathersit temp atemp hum windspeed casual registered cnt
0 1 2011-01-01 1 0 1 0 6 0 2 0.344167 0.363625 0.805833 0.160446 331 654 985
1 2 2011-01-02 1 0 1 0 0 0 2 0.363478 0.353739 0.696087 0.248539 131 670 801
2 3 2011-01-03 1 0 1 0 1 1 1 0.196364 0.189405 0.437273 0.248309 120 1229 1349
3 4 2011-01-04 1 0 1 0 2 1 1 0.200000 0.212122 0.590435 0.160296 108 1454 1562
4 5 2011-01-05 1 0 1 0 3 1 1 0.226957 0.229270 0.436957 0.186900 82 1518 1600
bike.columns
Index(['instant', 'dteday', 'season', 'yr', 'mnth', 'holiday', 'weekday',
       'workingday', 'weathersit', 'temp', 'atemp', 'hum', 'windspeed',
       'casual', 'registered', 'cnt'],
      dtype='object')
bike.columns = ['instant', 'date', 'season', 'year', 'month', 'holiday', 'weekday',
                'workingday', 'weathersituation', 'temp', 'atemp', 'humidity', 'windspeed',
                'casual_users', 'registered_users', 'cnt']
# bike.cnt.head(200) bike count per day may go above 8000 per day and some days as low as 100 bikes per day
f, ax = plt.subplots(figsize=(15, 5))
sns.set(style="whitegrid")
# sns.set_xticklabels=([0,500,1000,1500,2000,2500,3000,3500,4000,4500,5000,5500,6000,6500,7000,7500,8000,8500,9000])
sns.distplot(bike.cnt, bins=120, kde=False, rug=True, color='k')
<matplotlib.axes._subplots.AxesSubplot at 0x1a7126cd550>
f, ax = plt.subplots(figsize=(15, 5))
sns.set(style="whitegrid")
# sns.set_xticklabels=([0,500,1000,1500,2000,2500,3000,3500,4000,4500,5000,5500,6000,6500,7000,7500,8000,8500,9000])
sns.distplot(bike.cnt, bins=120, kde=True, rug=True, color='k')
<matplotlib.axes._subplots.AxesSubplot at 0x1a712736e48>
# number of bikes hired as a function of day passed
sns.jointplot(bike.instant, bike.cnt, height=8, color='g')
<seaborn.axisgrid.JointGrid at 0x1a7129c1b38>

two peaks in two years

sns.jointplot(bike.month, bike.cnt, height=8, color='g')
<seaborn.axisgrid.JointGrid at 0x1a715dd6da0>

monthwise counts; as seen in months 8,9,10 more bicycles are rented

season wise rental data

sns.jointplot(bike.season, bike.cnt)  # counts are higher in season 3 and 4
<seaborn.axisgrid.JointGrid at 0x1a716583198>
sns.pairplot(bike, height=6, aspect=1.2, x_vars=[
             'temp', 'humidity', 'windspeed'], y_vars='cnt', hue='season')
<seaborn.axisgrid.PairGrid at 0x1a716825780>

Themes and Styles

# sns.set_style('darkgrid') # applies to all lines ; affects all plots that follow
# f,ax = plt.subplots(figsize=(15,5))
# sns.scatterplot(bike.cnt)
with sns.axes_style('darkgrid'):  # applies to only this lines
    f, ax = plt.subplots(figsize=(15, 5))
    sns.distplot(bike.cnt)

How to remove spines

sns.jointplot(bike.instant, bike.cnt, height=8, color='g')
sns.despine(offset=15, trim=True)
sns.set()
sns.jointplot(bike.instant, bike.cnt, height=8, color='g')
<seaborn.axisgrid.JointGrid at 0x1a716ef27f0>

colors

current_palette = sns.color_palette()
sns.palplot(current_palette)
sns.palplot(sns.color_palette('hls', 8))  # hls-> Hue Lightness and Saturation
sns.palplot(sns.hls_palette(n_colors=15, h=.99, l=0.5, s=.99))
sns.choose_colorbrewer_palette('sequential')
[(0.9575547866205305, 0.9575547866205305, 0.9575547866205305),
 (0.9012072279892349, 0.9012072279892349, 0.9012072279892349),
 (0.8328950403690888, 0.8328950403690888, 0.8328950403690888),
 (0.7502191464821223, 0.7502191464821223, 0.7502191464821223),
 (0.6434140715109573, 0.6434140715109573, 0.6434140715109573),
 (0.5387158785082661, 0.5387158785082661, 0.5387158785082661),
 (0.440322952710496, 0.440322952710496, 0.440322952710496),
 (0.342883506343714, 0.342883506343714, 0.342883506343714),
 (0.22329873125720878, 0.22329873125720878, 0.22329873125720878),
 (0.10469819300269129, 0.10469819300269129, 0.10469819300269129)]
sns.choose_colorbrewer_palette('diverging')
[(0.6922722029988467, 0.09227220299884642, 0.16770472895040373),
 (0.8392156862745099, 0.376470588235294, 0.3019607843137253),
 (0.9545559400230679, 0.641753171856978, 0.5057285659361783),
 (0.9921568627450982, 0.8588235294117645, 0.7803921568627449),
 (0.9657054978854286, 0.9672433679354094, 0.9680891964628989),
 (0.8196078431372551, 0.8980392156862745, 0.9411764705882353),
 (0.5664744329104192, 0.7687043444828916, 0.8685121107266438),
 (0.26274509803921564, 0.576470588235294, 0.7647058823529411),
 (0.1272587466359093, 0.39584775086505203, 0.6687427912341407)]
# sequential color plots ;
sns.palplot(sns.color_palette('Blues'))
# sequential color plots ;
sns.palplot(sns.color_palette('Blues_d'))
sns.choose_colorbrewer_palette('sequential')
[(0.9575547866205305, 0.9575547866205305, 0.9575547866205305),
 (0.9012072279892349, 0.9012072279892349, 0.9012072279892349),
 (0.8328950403690888, 0.8328950403690888, 0.8328950403690888),
 (0.7502191464821223, 0.7502191464821223, 0.7502191464821223),
 (0.6434140715109573, 0.6434140715109573, 0.6434140715109573),
 (0.5387158785082661, 0.5387158785082661, 0.5387158785082661),
 (0.440322952710496, 0.440322952710496, 0.440322952710496),
 (0.342883506343714, 0.342883506343714, 0.342883506343714),
 (0.22329873125720878, 0.22329873125720878, 0.22329873125720878),
 (0.10469819300269129, 0.10469819300269129, 0.10469819300269129)]
sns.palplot(sns.color_palette('cubehelix', 12))
sns.palplot(sns.cubehelix_palette(12))  # good for printing

Diverging color palette

h = sns.choose_colorbrewer_palette('diverging')
h = sns.choose_colorbrewer_palette('sequential')
g = sns.PairGrid(wine_data, x_vars=['chlorides', 'sulphates', 'fixed_acidity'], y_vars=['alcohol'], hue='pH', palette=h, height=8)
g.map(sns.scatterplot)  # sns looks more beautiful
# g.add_legend()
<seaborn.axisgrid.PairGrid at 0x1a717b2e160>

Aesthetics

sns.axes_style()  # current style
{'axes.facecolor': '#EAEAF2',
 'axes.edgecolor': 'white',
 'axes.grid': True,
 'axes.axisbelow': True,
 'axes.labelcolor': '.15',
 'figure.facecolor': 'white',
 'grid.color': 'white',
 'grid.linestyle': '-',
 'text.color': '.15',
 'xtick.color': '.15',
 'ytick.color': '.15',
 'xtick.direction': 'out',
 'ytick.direction': 'out',
 'lines.solid_capstyle': 'round',
 'patch.edgecolor': 'w',
 'image.cmap': 'rocket',
 'font.family': ['sans-serif'],
 'font.sans-serif': ['Arial',
  'DejaVu Sans',
  'Liberation Sans',
  'Bitstream Vera Sans',
  'sans-serif'],
 'patch.force_edgecolor': True,
 'xtick.bottom': False,
 'xtick.top': False,
 'ytick.left': False,
 'ytick.right': False,
 'axes.spines.left': True,
 'axes.spines.bottom': True,
 'axes.spines.right': True,
 'axes.spines.top': True}
sns.set_style('ticks', {'xtick.major.size': 8, 'xtick.color': '.15',
                        'ytick.color': '.85', 'ytick.major.size': 10, 'axes.facecolor': 'm'})

sns.jointplot(bike.instant, bike.cnt, height=8, color='k')
<seaborn.axisgrid.JointGrid at 0x1a716d32940>
sns.set()  # reset everything
sns.jointplot(bike.instant, bike.cnt, height=8, color='k')
<seaborn.axisgrid.JointGrid at 0x1a7183b9390>

printable and seminar contexts

sns.set_context('talk')
sns.jointplot(bike.instant, bike.cnt, height=8, color='k')
<seaborn.axisgrid.JointGrid at 0x1a71a9aef98>
sns.set_context('paper')
sns.jointplot(bike.instant, bike.cnt, height=8, color='k')
<seaborn.axisgrid.JointGrid at 0x1a71b530710>
sns.set_context('notebook')
sns.jointplot(bike.instant, bike.cnt, height=8, color='k')
<seaborn.axisgrid.JointGrid at 0x1a71b193c50>
sns.set_context('poster')
sns.jointplot(bike.instant, bike.cnt, height=8, color='k')
<seaborn.axisgrid.JointGrid at 0x1a719c91e80>

continued!!!!!